library(tidyverse)
library(ggplot2)
library(lavaan)
library(car)
library(glmnet)
library(randomForestSRC)AAQoL machine learning analysis with unbalanced random forest
Data set
This data set is from the 2015 Asian American Quality of Life survey. Participants are from Austin, Texas.
Input data set
qol <- read_csv("AAQoL.csv") |> mutate(across(where(is.character), ~as.factor(.x))) |>
mutate(`English Difficulties`=relevel(`English Difficulties`,ref="Not at all"),
`English Speaking`=relevel(`English Speaking`,ref="Not at all"),
Ethnicity = relevel(Ethnicity,ref="Chinese")) |>
mutate(Income_median = case_match(Income,"$0 - $9,999"~"Below",
"$10,000 - $19,999" ~"Below",
"$20,000 - $29,999"~"Below",
"$30,000 - $39,999"~"Below",
"$40,000 - $49,999"~"Below",
"$50,000 - $59,999"~"Below",
"$60,000 - $69,999"~"Above",
"$70,000 and over"~"Above",
.default=Income)) |>
mutate(Income_median = factor(Income_median, levels=c("Below","Above")))New names:
Rows: 2609 Columns: 231
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(190): Gender, Ethnicity, Marital Status, No One, Spouse, Children, Gran... dbl
(41): Survey ID, Age, Education Completed, Household Size, Grandparent,...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Other` -> `Other...17`
• `Other` -> `Other...89`
qol |> DT::datatable()Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
Source of Information: Family
ps(Family)# A tibble: 4 × 3
Family n pct
<fct> <int> <dbl>
1 3 1 0.0383
2 No 1258 48.2
3 Yes 1331 51.0
4 <NA> 19 0.728
rfdata <- qol |> filter(Family %in% c("No","Yes")) |>
mutate(Family=droplevels(Family)) |>
select(Family, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
filter(!is.na(Family)) |>
# na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(Family ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(rfobj) Sample size: 2187
Frequency of class labels: 1069, 1118
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 481.122
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1382
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0458
(OOB) Brier score: 0.23047767
(OOB) Normalized Brier score: 0.92191067
(OOB) AUC: 0.65538154
(OOB) PR-AUC: 0.61755928
(OOB) G-mean: 0.61386047
(OOB) Requested performance error: 0.38613953
Confusion matrix:
predicted
observed No Yes class.error
No 707 362 0.3386
Yes 481 637 0.4302
(OOB) Misclassification rate: 0.3854595
print(rfobj) Sample size: 2187
Frequency of class labels: 1069, 1118
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 481.122
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1382
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0458
(OOB) Brier score: 0.23047767
(OOB) Normalized Brier score: 0.92191067
(OOB) AUC: 0.65538154
(OOB) PR-AUC: 0.61755928
(OOB) G-mean: 0.61386047
(OOB) Requested performance error: 0.38613953
Confusion matrix:
predicted
observed No Yes class.error
No 707 362 0.3386
Yes 481 637 0.4302
(OOB) Misclassification rate: 0.3854595
plot(rfobj,plots.one.page = FALSE)

all No Yes
Age 0.0322 NA NA
Ethnicity 0.0299 NA NA
EnglishSpeak 0.0113 NA NA
Gender 0.0107 NA NA
Religion 0.0092 NA NA
EnglishDiff 0.0090 NA NA
Get Along 0.0081 NA NA
Similar Values 0.0071 NA NA
Helpful Family 0.0071 NA NA
Religious Importance 0.0070 NA NA
Employment 0.0063 NA NA
Close Family 0.0062 NA NA
Spend Time Together 0.0061 NA NA
Community Trust 0.0056 NA NA
Community Shares Values 0.0046 NA NA
Helpful Community 0.0043 NA NA
Family Pride 0.0042 NA NA
Feel Close 0.0039 NA NA
Close-knit Community 0.0037 NA NA
Trust 0.0030 NA NA
Expression 0.0029 NA NA
Loyalty 0.0029 NA NA
See Friends 0.0027 NA NA
Successful Family 0.0020 NA NA
Income_median 0.0001 NA NA
Close Friends -0.0004 NA NA
rfobj$importance all No Yes
Ethnicity 0.0298736171 NA NA
Age 0.0321993406 NA NA
Gender 0.0107122858 NA NA
Religion 0.0091633231 NA NA
Employment 0.0062734484 NA NA
Income_median 0.0001492718 NA NA
EnglishSpeak 0.0112882719 NA NA
EnglishDiff 0.0089896934 NA NA
See Family -0.0009159657 NA NA
Close Family 0.0062397098 NA NA
Helpful Family 0.0071037309 NA NA
See Friends 0.0027465503 NA NA
Close Friends -0.0004087780 NA NA
Helpful Friends -0.0014438134 NA NA
Family Respect -0.0013574569 NA NA
Similar Values 0.0071457920 NA NA
Successful Family 0.0020479867 NA NA
Trust 0.0030046939 NA NA
Loyalty 0.0028684188 NA NA
Family Pride 0.0041954838 NA NA
Expression 0.0029314166 NA NA
Spend Time Together 0.0061082163 NA NA
Feel Close 0.0038668761 NA NA
Togetherness -0.0024044764 NA NA
Religious Attendance -0.0012261991 NA NA
Religious Importance 0.0069630886 NA NA
Close-knit Community 0.0037125808 NA NA
Helpful Community 0.0042750898 NA NA
Community Shares Values 0.0046403124 NA NA
Get Along 0.0081499187 NA NA
Community Trust 0.0056464934 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(Family=="Yes")
neg <- rfdata |> filter(Family=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(Family ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")
print(rfobj) Sample size: 1518
Frequency of class labels: 737, 781
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 338.252
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 959
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0597
(OOB) Brier score: 0.23811998
(OOB) Normalized Brier score: 0.95247992
(OOB) AUC: 0.61983862
(OOB) PR-AUC: 0.57893367
(OOB) G-mean: 0.58067382
(OOB) Requested performance error: 0.41932618
Confusion matrix:
predicted
observed No Yes class.error
No 461 276 0.3745
Yes 360 421 0.4609
(OOB) Misclassification rate: 0.4189723
print(rfobj) Sample size: 1518
Frequency of class labels: 737, 781
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 338.252
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 959
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0597
(OOB) Brier score: 0.23811998
(OOB) Normalized Brier score: 0.95247992
(OOB) AUC: 0.61983862
(OOB) PR-AUC: 0.57893367
(OOB) G-mean: 0.58067382
(OOB) Requested performance error: 0.41932618
Confusion matrix:
predicted
observed No Yes class.error
No 461 276 0.3745
Yes 360 421 0.4609
(OOB) Misclassification rate: 0.4189723
plot(rfobj,plots.one.page = FALSE)

all No Yes
Age 0.0470 NA NA
Ethnicity 0.0102 NA NA
Helpful Family 0.0069 NA NA
Community Shares Values 0.0053 NA NA
Religious Importance 0.0046 NA NA
Close-knit Community 0.0034 NA NA
Get Along 0.0024 NA NA
EnglishSpeak 0.0019 NA NA
Togetherness 0.0017 NA NA
Helpful Friends 0.0011 NA NA
Spend Time Together 0.0009 NA NA
Helpful Community 0.0004 NA NA
Close Family 0.0002 NA NA
Loyalty -0.0004 NA NA
Trust -0.0009 NA NA
Family Respect -0.0011 NA NA
EnglishDiff -0.0015 NA NA
Religion -0.0015 NA NA
Family Pride -0.0019 NA NA
Gender -0.0029 NA NA
Similar Values -0.0029 NA NA
Expression -0.0032 NA NA
Feel Close -0.0035 NA NA
Religious Attendance -0.0035 NA NA
Community Trust -0.0038 NA NA
Income_median -0.0045 NA NA
rfobj$importance all No Yes
Ethnicity 0.0102038208 NA NA
Age 0.0470496266 NA NA
Gender -0.0028532605 NA NA
Religion -0.0015283531 NA NA
Employment -0.0080802923 NA NA
Income_median -0.0045255872 NA NA
EnglishSpeak 0.0018924781 NA NA
EnglishDiff -0.0015283531 NA NA
See Family -0.0047036867 NA NA
Close Family 0.0001690678 NA NA
Helpful Family 0.0068515155 NA NA
See Friends -0.0075843402 NA NA
Close Friends -0.0093477945 NA NA
Helpful Friends 0.0011125622 NA NA
Family Respect -0.0010596669 NA NA
Similar Values -0.0029261995 NA NA
Successful Family -0.0075090251 NA NA
Trust -0.0008505768 NA NA
Loyalty -0.0003604139 NA NA
Family Pride -0.0018863303 NA NA
Expression -0.0032059623 NA NA
Spend Time Together 0.0009357273 NA NA
Feel Close -0.0034885651 NA NA
Togetherness 0.0017334100 NA NA
Religious Attendance -0.0035480431 NA NA
Religious Importance 0.0046485954 NA NA
Close-knit Community 0.0033922685 NA NA
Helpful Community 0.0004130311 NA NA
Community Shares Values 0.0053396877 NA NA
Get Along 0.0024330361 NA NA
Community Trust -0.0037665724 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
781.0000000 737.0000000 1.0597015 0.4855072 0.6255088 0.5390525
prec npv misclass brier brier.norm auc
0.5615104 0.6040172 0.4189723 0.2381200 0.9524799 0.6198386
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.5917843 0.5805264 0.4855072 0.5789337 0.5862291 0.5806001
gmean
0.5806738
Source of Information: Health Professionals
ps(`Heal Professionals`)# A tibble: 3 × 3
`Heal Professionals` n pct
<fct> <int> <dbl>
1 No 1326 50.8
2 Yes 1264 48.4
3 <NA> 19 0.728
rfdata <- qol |>
select(`Heal Professionals`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imbalanced(`Heal Professionals` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")->rfobj
print(rfobj) Sample size: 2188
Frequency of class labels: 1067, 1121
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 481.7493
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1383
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0506
(OOB) Brier score: 0.23121035
(OOB) Normalized Brier score: 0.92484139
(OOB) AUC: 0.65856211
(OOB) PR-AUC: 0.62968925
(OOB) G-mean: 0.61617778
(OOB) Requested performance error: 0.38382222
Confusion matrix:
predicted
observed No Yes class.error
No 662 405 0.3796
Yes 435 686 0.3880
(OOB) Misclassification rate: 0.3839122
plot(rfobj,plots.one.page = FALSE)

all No Yes
EnglishSpeak 0.0297 NA NA
Income_median 0.0077 NA NA
See Friends 0.0077 NA NA
Close Friends 0.0054 NA NA
See Family 0.0050 NA NA
Community Trust 0.0028 NA NA
Community Shares Values 0.0027 NA NA
Close-knit Community 0.0027 NA NA
Expression 0.0023 NA NA
Close Family 0.0013 NA NA
Helpful Friends 0.0013 NA NA
Age 0.0009 NA NA
Family Respect 0.0009 NA NA
Get Along 0.0005 NA NA
Employment 0.0004 NA NA
Togetherness 0.0002 NA NA
Similar Values 0.0000 NA NA
Trust -0.0004 NA NA
Helpful Community -0.0018 NA NA
Helpful Family -0.0023 NA NA
Gender -0.0023 NA NA
Successful Family -0.0031 NA NA
Loyalty -0.0032 NA NA
Religious Importance -0.0033 NA NA
Family Pride -0.0036 NA NA
Religious Attendance -0.0038 NA NA
rfobj$importance all No Yes
Ethnicity -6.229580e-03 NA NA
Age 8.846067e-04 NA NA
Gender -2.334744e-03 NA NA
Religion -4.843823e-03 NA NA
Employment 4.085398e-04 NA NA
Income_median 7.695117e-03 NA NA
EnglishSpeak 2.969744e-02 NA NA
EnglishDiff -5.583815e-03 NA NA
See Family 5.038137e-03 NA NA
Close Family 1.335206e-03 NA NA
Helpful Family -2.277969e-03 NA NA
See Friends 7.678630e-03 NA NA
Close Friends 5.405558e-03 NA NA
Helpful Friends 1.278098e-03 NA NA
Family Respect 8.601492e-04 NA NA
Similar Values 1.696055e-05 NA NA
Successful Family -3.091235e-03 NA NA
Trust -4.489456e-04 NA NA
Loyalty -3.208678e-03 NA NA
Family Pride -3.582459e-03 NA NA
Expression 2.278251e-03 NA NA
Spend Time Together -5.470183e-03 NA NA
Feel Close -4.070599e-03 NA NA
Togetherness 1.736981e-04 NA NA
Religious Attendance -3.752408e-03 NA NA
Religious Importance -3.296409e-03 NA NA
Close-knit Community 2.713520e-03 NA NA
Helpful Community -1.793826e-03 NA NA
Community Shares Values 2.743503e-03 NA NA
Get Along 4.832193e-04 NA NA
Community Trust 2.760539e-03 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Heal Professionals`=="Yes")
neg <- rfdata |> filter(`Heal Professionals`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Heal Professionals` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")
print(rfobj) Sample size: 1523
Frequency of class labels: 735, 788
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 337.315
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 963
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0721
(OOB) Brier score: 0.23687942
(OOB) Normalized Brier score: 0.94751767
(OOB) AUC: 0.63561932
(OOB) PR-AUC: 0.60730436
(OOB) G-mean: 0.60037858
(OOB) Requested performance error: 0.39962142
Confusion matrix:
predicted
observed No Yes class.error
No 448 287 0.3905
Yes 322 466 0.4086
(OOB) Misclassification rate: 0.3998687
print(rfobj) Sample size: 1523
Frequency of class labels: 735, 788
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 337.315
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 963
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.0721
(OOB) Brier score: 0.23687942
(OOB) Normalized Brier score: 0.94751767
(OOB) AUC: 0.63561932
(OOB) PR-AUC: 0.60730436
(OOB) G-mean: 0.60037858
(OOB) Requested performance error: 0.39962142
Confusion matrix:
predicted
observed No Yes class.error
No 448 287 0.3905
Yes 322 466 0.4086
(OOB) Misclassification rate: 0.3998687
plot(rfobj,plots.one.page = FALSE)

all No Yes
EnglishDiff 0.0152 NA NA
EnglishSpeak 0.0093 NA NA
Income_median 0.0091 NA NA
Gender 0.0026 NA NA
Spend Time Together 0.0007 NA NA
Community Trust 0.0006 NA NA
See Friends 0.0000 NA NA
Community Shares Values -0.0005 NA NA
Successful Family -0.0006 NA NA
Expression -0.0007 NA NA
Get Along -0.0011 NA NA
Loyalty -0.0013 NA NA
Close Friends -0.0014 NA NA
Similar Values -0.0019 NA NA
Close Family -0.0026 NA NA
Family Pride -0.0027 NA NA
Feel Close -0.0032 NA NA
See Family -0.0032 NA NA
Helpful Family -0.0032 NA NA
Religious Attendance -0.0033 NA NA
Togetherness -0.0038 NA NA
Religion -0.0058 NA NA
Family Respect -0.0060 NA NA
Employment -0.0066 NA NA
Helpful Friends -0.0073 NA NA
Helpful Community -0.0078 NA NA
rfobj$importance all No Yes
Ethnicity -1.119613e-02 NA NA
Age -9.199712e-03 NA NA
Gender 2.628499e-03 NA NA
Religion -5.806950e-03 NA NA
Employment -6.595408e-03 NA NA
Income_median 9.104856e-03 NA NA
EnglishSpeak 9.258181e-03 NA NA
EnglishDiff 1.515708e-02 NA NA
See Family -3.212321e-03 NA NA
Close Family -2.571226e-03 NA NA
Helpful Family -3.243786e-03 NA NA
See Friends -2.444395e-05 NA NA
Close Friends -1.429018e-03 NA NA
Helpful Friends -7.326022e-03 NA NA
Family Respect -5.986364e-03 NA NA
Similar Values -1.929449e-03 NA NA
Successful Family -6.151098e-04 NA NA
Trust -7.796050e-03 NA NA
Loyalty -1.286987e-03 NA NA
Family Pride -2.674305e-03 NA NA
Expression -6.696919e-04 NA NA
Spend Time Together 7.309024e-04 NA NA
Feel Close -3.212321e-03 NA NA
Togetherness -3.817017e-03 NA NA
Religious Attendance -3.272389e-03 NA NA
Religious Importance -1.390461e-02 NA NA
Close-knit Community -8.577679e-03 NA NA
Helpful Community -7.760562e-03 NA NA
Community Shares Values -5.490300e-04 NA NA
Get Along -1.107606e-03 NA NA
Community Trust 6.214982e-04 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
788.0000000 735.0000000 1.0721088 0.4826001 0.6095238 0.5913706
prec npv misclass brier brier.norm auc
0.5818182 0.6188579 0.3998687 0.2368794 0.9475177 0.6356193
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.5953488 0.6000382 0.4826001 0.6073044 0.5978637 0.6002084
gmean
0.6003786
Health Insurance
ps(`Health Insurance`)# A tibble: 3 × 3
`Health Insurance` n pct
<fct> <int> <dbl>
1 0 381 14.6
2 Yes 2207 84.6
3 <NA> 21 0.805
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Health Insurance`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Health Insurance` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2189
Frequency of class labels: 292, 1897
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 258.4477
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1383
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 6.4966
(OOB) Brier score: 0.1037283
(OOB) Normalized Brier score: 0.41491322
(OOB) AUC: 0.7475827
(OOB) PR-AUC: 0.33023442
(OOB) G-mean: 0.66755909
(OOB) Requested performance error: 0.33244091
Confusion matrix:
predicted
observed 0 Yes class.error
0 224 68 0.2329
Yes 795 1102 0.4191
(OOB) Misclassification rate: 0.3942439
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1897.0000000 292.0000000 6.4965753 0.1333942 0.7671233 0.5809172
prec npv misclass brier brier.norm auc
0.2198234 0.9418803 0.3942439 0.1037283 0.4149132 0.7475827
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.3417239 0.4631881 0.1333942 0.3302344 0.5046415 0.5653736
gmean
0.6675591
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Health Insurance`=="Yes")
neg <- rfdata |> filter(`Health Insurance`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Health Insurance` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = `Health Insurance` ~ ., data = structure(list(: empty classes found when implementing classification
print(rfobj) Sample size: 1332
Frequency of class labels: NA, 1332
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 842
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 1332 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 1332
Frequency of class labels: NA, 1332
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 842
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 1332 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
1332 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN
Dental Insurance
ps(`Dental Insurance`)# A tibble: 3 × 3
`Dental Insurance` n pct
<fct> <int> <dbl>
1 0 1050 40.2
2 Yes 1529 58.6
3 <NA> 30 1.15
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Dental Insurance`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Dental Insurance` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2184
Frequency of class labels: 849, 1335
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 397.2453
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1380
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.5724
(OOB) Brier score: 0.17717336
(OOB) Normalized Brier score: 0.70869343
(OOB) AUC: 0.79752209
(OOB) PR-AUC: 0.70202991
(OOB) G-mean: 0.72735508
(OOB) Requested performance error: 0.27264492
Confusion matrix:
predicted
observed 0 Yes class.error
0 642 207 0.2438
Yes 401 934 0.3004
(OOB) Misclassification rate: 0.2783883
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1335.0000000 849.0000000 1.5724382 0.3887363 0.7561837 0.6996255
prec npv misclass brier brier.norm auc
0.6155321 0.8185802 0.2783883 0.1771734 0.7086934 0.7975221
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.6786469 0.7145404 0.3887363 0.7020299 0.7030010 0.7209477
gmean
0.7273551
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Dental Insurance`=="Yes")
neg <- rfdata |> filter(`Dental Insurance`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Dental Insurance` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = `Dental Insurance` ~ ., data = structure(list(: empty classes found when implementing classification
print(rfobj) Sample size: 932
Frequency of class labels: NA, 932
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 589
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 932 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 932
Frequency of class labels: NA, 932
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 589
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 932 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
932 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN
Physical Checkup
ps(`Physical Check-up`)# A tibble: 3 × 3
`Physical Check-up` n pct
<fct> <int> <dbl>
1 0 833 31.9
2 Yes 1740 66.7
3 <NA> 36 1.38
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Physical Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Physical Check-up` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2178
Frequency of class labels: 704, 1474
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 425.347
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1376
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 2.0938
(OOB) Brier score: 0.19839173
(OOB) Normalized Brier score: 0.79356692
(OOB) AUC: 0.68853017
(OOB) PR-AUC: 0.49762209
(OOB) G-mean: 0.63818128
(OOB) Requested performance error: 0.36181872
Confusion matrix:
predicted
observed 0 Yes class.error
0 492 212 0.3011
Yes 615 859 0.4172
(OOB) Misclassification rate: 0.3797062
plot(imb,plots.one.page = F)

all 0 Yes
Age 0.0537 NA NA
Income_median 0.0194 NA NA
Ethnicity 0.0109 NA NA
Expression 0.0071 NA NA
Employment 0.0048 NA NA
Religious Importance 0.0040 NA NA
Loyalty 0.0036 NA NA
Get Along 0.0026 NA NA
Gender 0.0022 NA NA
EnglishDiff 0.0012 NA NA
EnglishSpeak 0.0010 NA NA
Helpful Friends 0.0002 NA NA
Religious Attendance -0.0002 NA NA
Family Pride -0.0003 NA NA
Religion -0.0006 NA NA
Successful Family -0.0012 NA NA
Togetherness -0.0013 NA NA
Close-knit Community -0.0017 NA NA
Spend Time Together -0.0019 NA NA
Helpful Family -0.0019 NA NA
Close Family -0.0020 NA NA
See Family -0.0026 NA NA
Feel Close -0.0028 NA NA
Helpful Community -0.0034 NA NA
Similar Values -0.0036 NA NA
Community Trust -0.0046 NA NA
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1474.0000000 704.0000000 2.0937500 0.3232323 0.6988636 0.5827680
prec npv misclass brier brier.norm auc
0.4444444 0.8020542 0.3797062 0.1983917 0.7935669 0.6885302
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.5433462 0.6020794 0.3232323 0.4976221 0.5907637 0.6201304
gmean
0.6381813
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Physical Check-up`=="Yes")
neg <- rfdata |> filter(`Physical Check-up`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Physical Check-up` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = `Physical Check-up` ~ ., data = structure(list(: empty classes found when implementing classification
print(rfobj) Sample size: 1028
Frequency of class labels: NA, 1028
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 650
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 1028 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 1028
Frequency of class labels: NA, 1028
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 650
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 1028 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
1028 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN
Dental Checkup
ps(`Dentist Check-up`)# A tibble: 3 × 3
`Dentist Check-up` n pct
<fct> <int> <dbl>
1 0 1100 42.2
2 Yes 1462 56.0
3 <NA> 47 1.80
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Dentist Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Dentist Check-up` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2175
Frequency of class labels: 896, 1279
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 450.2923
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1375
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 1.4275
(OOB) Brier score: 0.21233082
(OOB) Normalized Brier score: 0.84932329
(OOB) AUC: 0.70795578
(OOB) PR-AUC: 0.60124704
(OOB) G-mean: 0.65641478
(OOB) Requested performance error: 0.34358522
Confusion matrix:
predicted
observed 0 Yes class.error
0 618 278 0.3103
Yes 480 799 0.3753
(OOB) Misclassification rate: 0.3485057
plot(imb,plots.one.page = F)

all 0 Yes
EnglishSpeak 0.0169 NA NA
Age 0.0144 NA NA
EnglishDiff 0.0103 NA NA
Employment 0.0072 NA NA
Gender 0.0071 NA NA
Similar Values 0.0070 NA NA
Income_median 0.0051 NA NA
Helpful Family 0.0049 NA NA
Religious Importance 0.0047 NA NA
Expression 0.0038 NA NA
Togetherness 0.0035 NA NA
Helpful Friends 0.0033 NA NA
See Friends 0.0032 NA NA
Successful Family 0.0031 NA NA
Family Pride 0.0029 NA NA
Loyalty 0.0017 NA NA
See Family 0.0016 NA NA
Feel Close 0.0005 NA NA
Close Family 0.0004 NA NA
Get Along 0.0003 NA NA
Ethnicity 0.0003 NA NA
Community Trust 0.0001 NA NA
Religious Attendance 0.0000 NA NA
Spend Time Together -0.0001 NA NA
Close Friends -0.0010 NA NA
Religion -0.0028 NA NA
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1279.0000000 896.0000000 1.4274554 0.4119540 0.6897321 0.6247068
prec npv misclass brier brier.norm auc
0.5628415 0.7418756 0.3485057 0.2123308 0.8493233 0.7079558
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.6198596 0.6477499 0.4119540 0.6012470 0.6381372 0.6520823
gmean
0.6564148
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Dentist Check-up`=="Yes")
neg <- rfdata |> filter(`Dentist Check-up`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Dentist Check-up` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = `Dentist Check-up` ~ ., data = structure(list(: empty classes found when implementing classification
print(rfobj) Sample size: 896
Frequency of class labels: NA, 896
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 566
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 896 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 896
Frequency of class labels: NA, 896
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 566
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 896 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
896 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN
Urgent Care
ps(`Urgentcare`)# A tibble: 3 × 3
Urgentcare n pct
<fct> <int> <dbl>
1 0 2112 81.0
2 Yes 440 16.9
3 <NA> 57 2.18
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Urgentcare`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Urgentcare` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2167
Frequency of class labels: 1808, 359
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 326.3993
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1370
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 5.0362
(OOB) Brier score: 0.13731167
(OOB) Normalized Brier score: 0.54924669
(OOB) AUC: 0.59106078
(OOB) PR-AUC: 0.23239721
(OOB) G-mean: 0.5633998
(OOB) Requested performance error: 0.4366002
Confusion matrix:
predicted
observed 0 Yes class.error
0 873 935 0.5171
Yes 123 236 0.3426
(OOB) Misclassification rate: 0.4882326
plot(imb,plots.one.page = F)

all 0 Yes
Ethnicity 0.0307 NA NA
Close Family 0.0301 NA NA
Age 0.0256 NA NA
Trust 0.0212 NA NA
EnglishSpeak 0.0189 NA NA
Get Along 0.0179 NA NA
Religion 0.0179 NA NA
Close Friends 0.0173 NA NA
Loyalty 0.0170 NA NA
Togetherness 0.0164 NA NA
Feel Close 0.0143 NA NA
Family Respect 0.0138 NA NA
Family Pride 0.0135 NA NA
Helpful Family 0.0135 NA NA
Helpful Friends 0.0130 NA NA
See Friends 0.0120 NA NA
Helpful Community 0.0116 NA NA
Employment 0.0112 NA NA
Spend Time Together 0.0111 NA NA
Religious Importance 0.0109 NA NA
Income_median 0.0109 NA NA
Similar Values 0.0105 NA NA
Successful Family 0.0096 NA NA
Religious Attendance 0.0096 NA NA
Community Trust 0.0086 NA NA
See Family 0.0083 NA NA
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1808.0000000 359.0000000 5.0362117 0.1656668 0.6573816 0.4828540
prec npv misclass brier brier.norm auc
0.2015371 0.8765060 0.4882326 0.1373117 0.5492467 0.5910608
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.3084967 0.4125853 0.1656668 0.2323972 0.4359483 0.4879926
gmean
0.5633998
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Urgentcare`=="Yes")
neg <- rfdata |> filter(`Urgentcare`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Urgentcare`~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = Urgentcare ~ ., data = structure(list(Urgentcare = structure(c(2L, : empty classes found when implementing classification
print(rfobj) Sample size: 251
Frequency of class labels: NA, 251
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 159
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 251 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 251
Frequency of class labels: NA, 251
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 159
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 251 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
251 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN
Folk Medicine
ps(`Folkmedicine`)# A tibble: 3 × 3
Folkmedicine n pct
<fct> <int> <dbl>
1 0 2189 83.9
2 Yes 348 13.3
3 <NA> 72 2.76
Random Forest (randomForestSRC)
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Folkmedicine`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame()
imb <- imbalanced(`Folkmedicine` ~ .,importance=T,data=rfdata,
perf.type = "gmean",splitrule="gini")
print(imb) Sample size: 2152
Frequency of class labels: 1866, 286
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 271.6027
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 1360
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: 6.5245
(OOB) Brier score: 0.11195505
(OOB) Normalized Brier score: 0.44782019
(OOB) AUC: 0.66685311
(OOB) PR-AUC: 0.21743813
(OOB) G-mean: 0.62098892
(OOB) Requested performance error: 0.37901108
Confusion matrix:
predicted
observed 0 Yes class.error
0 1029 837 0.4486
Yes 86 200 0.3007
(OOB) Misclassification rate: 0.4289033
plot(imb,plots.one.page = F)

all 0 Yes
Ethnicity 0.0446 NA NA
Age 0.0441 NA NA
EnglishSpeak 0.0130 NA NA
Family Pride 0.0088 NA NA
Employment 0.0082 NA NA
Helpful Friends 0.0080 NA NA
Successful Family 0.0064 NA NA
Expression 0.0052 NA NA
Feel Close 0.0049 NA NA
Community Trust 0.0040 NA NA
Close Friends 0.0037 NA NA
EnglishDiff 0.0037 NA NA
Community Shares Values 0.0030 NA NA
Togetherness 0.0027 NA NA
Loyalty 0.0025 NA NA
Religious Importance 0.0024 NA NA
Trust 0.0023 NA NA
See Friends 0.0023 NA NA
Religion 0.0021 NA NA
Close Family 0.0021 NA NA
Spend Time Together 0.0016 NA NA
Get Along 0.0009 NA NA
Family Respect 0.0006 NA NA
Close-knit Community 0.0002 NA NA
Similar Values -0.0003 NA NA
Helpful Family -0.0010 NA NA
get.imbalanced.performance(imb) n.majority n.minority iratio threshold sens spec
1866.0000000 286.0000000 6.5244755 0.1328996 0.6993007 0.5514469
prec npv misclass brier brier.norm auc
0.1928640 0.9228700 0.4289033 0.1119550 0.4478202 0.6668531
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
0.3023432 0.4205220 0.1328996 0.2174381 0.4616660 0.5207555
gmean
0.6209889
var_importance <- imb$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
# Create ggplot for variable importance
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_minimal()
plot(importance_plot)
Training/Test set Variable Importance
Training Importance
pos<- rfdata |> filter(`Folkmedicine`=="Yes")
neg <- rfdata |> filter(`Folkmedicine`=="No")
set.seed(222)
ind_pos <- sample(c(0,1), nrow(pos), replace = T, prob = c(0.7, 0.3))
ind_neg <- sample(c(0,1), nrow(neg), replace = T, prob = c(0.7, 0.3))
train <- bind_rows(pos[ind_pos==0,],neg[ind_neg==0,])
test <- bind_rows(pos[ind_pos==1,],neg[ind_neg==1,])
# rfsrc(Family~.,data=rfdata, importance="permute", perf.type="gmean",block.size = 10) ->rfobj
rfobj <- imbalanced(`Folkmedicine` ~ .,importance=T,data=train,
perf.type = "gmean",splitrule="gini")Warning in rfsrc(formula = Folkmedicine ~ ., data = structure(list(Folkmedicine = structure(c(2L, : empty classes found when implementing classification
print(rfobj) Sample size: 200
Frequency of class labels: NA, 200
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 126
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 200 0 1
(OOB) Misclassification rate: 1
print(rfobj) Sample size: 200
Frequency of class labels: NA, 200
Number of trees: 3000
Forest terminal node size: 1
Average no. of terminal nodes: 1
No. of variables tried at each split: 6
Total no. of variables: 31
Resampling used to grow trees: swor
Resample size used to grow trees: 126
Analysis: RFQ
Family: class
Splitting rule: gini *random*
Number of random split points: 10
Imbalanced ratio: Inf
(OOB) Brier score: 0
(OOB) Normalized Brier score: 0
(OOB) AUC: NaN
(OOB) PR-AUC: NA
(OOB) G-mean: NaN
(OOB) Requested performance error: 1
Confusion matrix:
predicted
observed 0 Yes class.error
0 0 0 NaN
Yes 200 0 1
(OOB) Misclassification rate: 1
plot(rfobj,plots.one.page = FALSE)

all 0 Yes
Community Trust 0 NA NA
Get Along 0 NA NA
Community Shares Values 0 NA NA
Helpful Community 0 NA NA
Close-knit Community 0 NA NA
Religious Importance 0 NA NA
Religious Attendance 0 NA NA
Togetherness 0 NA NA
Feel Close 0 NA NA
Spend Time Together 0 NA NA
Expression 0 NA NA
Family Pride 0 NA NA
Loyalty 0 NA NA
Trust 0 NA NA
Successful Family 0 NA NA
Similar Values 0 NA NA
Family Respect 0 NA NA
Helpful Friends 0 NA NA
Close Friends 0 NA NA
See Friends 0 NA NA
Helpful Family 0 NA NA
Close Family 0 NA NA
See Family 0 NA NA
EnglishDiff 0 NA NA
EnglishSpeak 0 NA NA
Income_median 0 NA NA
rfobj$importance all 0 Yes
Ethnicity 0 NA NA
Age 0 NA NA
Gender 0 NA NA
Religion 0 NA NA
Employment 0 NA NA
Income_median 0 NA NA
EnglishSpeak 0 NA NA
EnglishDiff 0 NA NA
See Family 0 NA NA
Close Family 0 NA NA
Helpful Family 0 NA NA
See Friends 0 NA NA
Close Friends 0 NA NA
Helpful Friends 0 NA NA
Family Respect 0 NA NA
Similar Values 0 NA NA
Successful Family 0 NA NA
Trust 0 NA NA
Loyalty 0 NA NA
Family Pride 0 NA NA
Expression 0 NA NA
Spend Time Together 0 NA NA
Feel Close 0 NA NA
Togetherness 0 NA NA
Religious Attendance 0 NA NA
Religious Importance 0 NA NA
Close-knit Community 0 NA NA
Helpful Community 0 NA NA
Community Shares Values 0 NA NA
Get Along 0 NA NA
Community Trust 0 NA NA
var_importance <- rfobj$importance[, "all"]
var_importance_df <- data.frame(variable = names(var_importance), importance = var_importance)
importance_plot <- ggplot(var_importance_df, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "#F8766D") +
coord_flip() +
labs(title = "Variable Importance", x = "Variable", y = "Importance") +
theme_bw()
plot(importance_plot)
Test Set Importance
test_rf <- predict.rfsrc(rfobj,newdata=test)
get.imbalanced.performance(rfobj) n.majority n.minority iratio threshold sens spec
200 0 Inf 0 NaN 0
prec npv misclass brier brier.norm auc
0 NaN 1 0 0 NaN
F1 F1mod pr.auc.rand pr.auc F1gmean F1modgmean
NaN NaN NA NA NaN NaN
gmean
NaN